##### Create 'not in' operator
"%!in%" <- function(x,table) match(x,table, nomatch = 0) == 0

##### Prepare object to write into a file
prepare2write <- function (x) {
  
  x2write <- cbind(rownames(x), x)
  colnames(x2write) <- c("Gene",colnames(x))
  return(x2write)
}

##### Combine sample expression profile with reference datasets. This function outputs a vector with first element containing the merged data and second element containing merged targets info
combineDataets <- function(sample_name, sample_counts, ref_dataset) {
  
  ##### Read file with reference datasets information
  DatasetInput=read.table(ref_dataset, sep="\t", as.is=TRUE, header=TRUE, row.names=1)
  
  ##### Extract info about target file for the first dataset
  fileInfo = strsplit(DatasetInput[,"Target_file"], split='/', fixed=TRUE)
  targetFile <- read.table(DatasetInput[1,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
  rownames(targetFile) <- targetFile[,"Sample_name"]
  targetFile <- cbind(targetFile[,2:4],rownames(DatasetInput[1,]))
  colnames(targetFile)[ncol(targetFile)] <- "Dataset"
  
  if ( nrow(DatasetInput) > 1 ) {
    for ( i in 2:nrow(DatasetInput) ) {
        
      ##### Create a temporary object to store info from the remaining target files
      targetFileTmp <- read.table(DatasetInput[i,"Target_file"], sep="\t", as.is=TRUE, header=TRUE)[,c(1:4)]
      rownames(targetFileTmp) <- targetFileTmp[,"Sample_name"]
      targetFileTmp <- cbind(targetFileTmp[,2:4],rownames(DatasetInput[i,]))
      colnames(targetFileTmp)[ncol(targetFileTmp)] <- "Dataset"
        
      targetFile <- rbind(targetFile, targetFileTmp)
    }
  }  
  
  ##### Add sample info
  sampleTargetFile <- data.frame(sample_counts, sample_name, NA, sample_name)
  names(sampleTargetFile) <- names(targetFile)
  rownames(sampleTargetFile) <- sample_name
  targetFile <- rbind( targetFile, sampleTargetFile )
  
  ##### Make syntactically valid names
  rownames(targetFile) <- make.names(rownames(targetFile))
  
  ##### Read sample read count file and combine it with reference datasets
  datasets.comb=read.table(sample_counts, sep="\t", as.is=TRUE, header=FALSE, row.names=NULL)
  names(datasets.comb) <- c("", sample_name)
      
  ##### list genes present in the read count file
  gene_list <- as.vector(datasets.comb[,1])
      
  ##### Loop through the expression data from different datasets and merge them into one matrix
  for ( data_matrix in DatasetInput[ , "Expression_matrix" ] ) {
    
    ##### Add data from the reference datasets
    dataset <- as.data.frame( read.table(data_matrix, header=TRUE, sep="\t", row.names=NULL) )
      
    ##### list genes present in individal files
    gene_list <- c( gene_list, as.vector(dataset[,1]) )
    
    ##### Merge the expression datasets and make sure that the genes order is the same
    datasets.comb <- merge( datasets.comb, dataset, by=1, all = FALSE, sort= TRUE)
      
    ##### Remove per-sample data for merged samples to free some memory
    rm(dataset)
  }
  
  ##### Use gene IDs as rownames
  rownames(datasets.comb) <- datasets.comb[,1]
  datasets.comb <- datasets.comb[, -1]
  
  ##### Make syntactically valid names
  colnames(datasets.comb) <- make.names(colnames(datasets.comb))
  
  ##### Make sure that the target file contains info only about samples present in the data matrix
  targetFile <- targetFile[ rownames(targetFile) %in% colnames(datasets.comb),  ]
  
  ##### Make sure that the samples order in the data matrix is the same as in the target file 
  datasets.comb <- datasets.comb[ , rownames(targetFile) ]
  
  ##### Identify genes that were not present across all per-sampel files and were ommited in the merged matrix
  gene_list <- unique(gene_list)
  gene_list.missing <- gene_list[ gene_list %!in% rownames(datasets.comb) ]
  
  ##### Write list of missing genes into a file
  if ( length(gene_list.missing) > 0 ) {
    write.table(prepare2write(gene_list.missing), file = paste0(params$report_dir, "/", sample_name,".missing_genes.txt"), sep="\t", quote=FALSE, row.names=TRUE, append = FALSE )
  }
  
    return( list(datasets.comb, targetFile) )
}


##### Assign colours to different groups
getTargetsColours <- function(targets) {
  
##### Predefined selection of colours for groups
targets.colours <- c("red","blue","green","darkgoldenrod","darkred","deepskyblue", "coral", "cornflowerblue", "chartreuse4", "bisque4", "chocolate3", "cadetblue3", "darkslategrey", "lightgoldenrod4", "mediumpurple4", "orangered3","indianred1","blueviolet","darkolivegreen4","darkgoldenrod4","firebrick3","deepskyblue4", "coral3", "dodgerblue1", "chartreuse3", "bisque3", "chocolate4", "cadetblue", "darkslategray4", "lightgoldenrod3", "mediumpurple3", "orangered1")
  
  f.targets <- factor(targets)
  vec.targets <- targets.colours[1:length(levels(f.targets))]
  targets.colour <- rep(0,length(f.targets))
  for(i in 1:length(f.targets))
    targets.colour[i] <- vec.targets[ f.targets[i]==levels(f.targets)]
  
  return( list(vec.targets, targets.colour) )
}

##### Assign colours to different datasets
getDatasetsColours <- function(datasets) {
  
  ##### Predefined selection of colours for datasets
  datasets.colours <- c("dodgerblue","firebrick","lightslategrey","darkseagreen","orange","darkcyan","bisque", "coral2", "cadetblue3","red","blue","green")
  
  f.datasets <- factor(datasets)
  vec.datasets <- datasets.colours[1:length(levels(f.datasets))]
  datasets.colour <- rep(0,length(f.datasets))
  for(i in 1:length(f.datasets))
    datasets.colour[i] <- vec.datasets[ f.datasets[i]==levels(f.datasets)]
  
  return( list(vec.datasets, datasets.colour) )
}

##### Perform PCA. This function outputs a list with dataframe and samples colouring info ready for plotting
pca <- function(data, target) {

  ##### Keep only genes with variance > 0 across all samples
  rsd <- apply(data,1,sd)
  data.subset <- data[rsd>0,]
  
  ##### Perform PCA
  data.subset_pca <- prcomp(t(data.subset), scale=FALSE)
  
  ##### Get variance importance for all principal components
  importance_pca <- summary(data.subset_pca)$importance[2,]
  importance_pca <- paste(round(100*importance_pca, 2), "%", sep="")
  names(importance_pca) <- names(summary(data.subset_pca)$importance[2,])
    
  ##### Prepare data frame
  data.subset_pca.df <- data.frame(target$Target, target$Dataset, data.subset_pca$x[,"PC1"], data.subset_pca$x[,"PC2"], data.subset_pca$x[,"PC3"])
  colnames(data.subset_pca.df) <- c("Target", "Dataset", "PC1", "PC2", "PC3")
  
  ##### Assigne colours to targets and datasets
  targets.colour <- getTargetsColours(target$Target)
  datasets.colour <- getDatasetsColours(target$Dataset)
  
  ##### Create a list with dataframe and samples colouring info
  pca.list <- list(data.subset_pca.df, importance_pca, targets.colour, datasets.colour)
  names(pca.list) <- c("pca.df", "importance_pca", "targets", "datasets")
  
  return( pca.list )
}
suppressMessages(library(edgeR))
suppressMessages(library(preprocessCore))
suppressMessages(library(rapportools))
suppressMessages(library(plotly))
suppressMessages(library(edgeR))
##### Create a list with reference datasets
ref_datasets <- c("mix", "core_gastrointestinal", "developmental_gastrointestinal", "pancreas", "bailey", "collisson", "moffitt")
ref_datasets.list <- vector("list", length(ref_datasets))
names(ref_datasets.list) <- ref_datasets

##### Read in reference datasets and merge them with sample data. This part outputs a vector with first element containing the merged data and second element containing merged targets info
ref_datasets.list[["mix"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_mix)
ref_datasets.list[["core_gastrointestinal"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_core_gastrointestinal)
ref_datasets.list[["developmental_gastrointestinal"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_developmental_gastrointestinal)
ref_datasets.list[["pancreas"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_pancreas)
ref_datasets.list[["bailey"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_bailey)
ref_datasets.list[["collisson"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_collisson)
ref_datasets.list[["moffitt"]] <- combineDataets(params$sample_name, params$count_file, params$datasets_moffitt)
##### Data transformation and filtering
##### For differential expression and related analyses, gene expression is rarely considered at the level of raw counts since libraries sequenced at a greater depth will result in higher counts. Rather, it is common practice to transform raw counts onto a scale that accounts for such library size differences. Here we convert the read count data into log2-counts per million (***log-CPM***) using function from *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html)* package. Genes with very low counts across all libraries provide little evidence for differential expression. In the biological point of view, a gene must be expressed at some minimal level before it is likely to be translated into a protein or to be biologically important. In addition, the pronounced discretenes of these counts interferes with some of the statistical approximations that are used later in the pipeline. These genes should be filtered out prior to further analysis.

##### Loop through combined datasets
for ( ref in names(ref_datasets.list) ) {
  
  counts <- ref_datasets.list[[ref]][[1]]
  target <- ref_datasets.list[[ref]][[2]]
  
  ##### Create EdgeR DGEList object
  y <- DGEList(counts=counts,  group=target$Target)
  
  ##### Add datasets name for each sample
  y$samples$dataset <- target$Dataset
  
  ##### Filtering to remove low expressed genes. Users should filter with CPM rather than filtering on the counts directly, as the latter does not account for differences in library sizes between samples. Here we keep only genes that have CPM of 1
  keep <- rowSums(cpm(y)>1) >= ncol(counts)/10
  y.filtered <- y[keep, , keep.lib.sizes=FALSE]
  
  ref_datasets.list[[ref]][[3]] <- y.filtered
}

# cat("The CPM of 1 (cut-off for removing low expressed genes) corresponds to", round(min(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the lowest sequencing depth, and", round(max(as.numeric(colSums(counts)*1e-6)), digits=0), "reads in sample with the greatest sequencing depth\n")

cat(nrow(y.filtered$counts), "genes remained after filtering low expressed genes, out of the total", nrow(counts), "input genes\n\n")
18045 genes remained after filtering low expressed genes, out of the total 54516 input genes
##### Data normalisation
##### During the sample preparation or sequencing process, external factors that are not of biological interest can affect the expression of individual samples. For example, samples processed in the first batch of an experiment can have higher expression overall when compared to samples processed in a second batch. It is assumed that all samples should have a similar range and distribution of expression values. Normalisation for sample-specific effectss is required to ensure that the expression distributions of each sample are similar across the entire experiment. Normalisation by the method of *[trimmed mean of M-values](https://www.ncbi.nlm.nih.gov/pubmed/20196867) (TMM)* is performed using the *calcNormFactors* function in *[edgeR](https://bioconductor.org/packages/release/bioc/html/edgeR.html)*. The normalisation factors calculated here are used as a scaling factor for the library sizes. TMM is the recommended for most RNA-Seq data where the majority (more than half) of the genes are believed not differentially expressed between any pair of the samples.

##### Adjust for RNA composition effect. Calculate scaling factors for the library sizes with calcNormFactors function using trimmed mean of M-values (TMM) between each pair of samples. Note, that the raw read counts are used to calculate the normalisation factors

##### Loop through combined datasets
for ( ref in names(ref_datasets.list) ) {
  
  y.filtered <- ref_datasets.list[[ref]][[3]]
  
  y.filtered.norm <- calcNormFactors(y.filtered, method = "TMM")
  
  ##### Transformations from the raw-scale to CPM
  y.filtered.norm.cpm <- cpm(y.filtered.norm, normalized.lib.sizes=TRUE, log=TRUE, prior.count=0.25)
  
  ref_datasets.list[[ref]][[3]] <- y.filtered.norm.cpm
}
##### Principal component analysis (PCA)
##### Loop through combined datasets and perform PCA
for ( ref in names(ref_datasets.list) ) {
  
  target <- ref_datasets.list[[ref]][[2]]
  data <- ref_datasets.list[[ref]][[3]]
  
  ref_datasets.list[[ref]][[4]] <- pca(data, target)
}

Comparison across tumour types

Principal component analysis (PCA) was performed to present the data from investigated sample in the context of patients with various tumour types derived from TCGA cohort. PCA reduces the dimensionality of data while retaining most of the variation in the dataset, making it possible to visually assess similarities and differences between the investigated sample and the various patient cohorts. The sample expression profile was projected against the four patient cohorts representing (1) unrelated tumour types (distant tumours), (2) core gastrointestinal tumours, (3) developmental gastrointestinal tumours and (4) pancreas-related lesions/tissues.

Development notes

  • run clustering analysis for individual datasets to select most similar samples in each tumour type

Distant

The expression profile of investigated sample (in red) was projected against unrelated tumour types, including (1) acute myeloid leukaemia, (2) brain lower grade glioma, (3) breast invasive carcinoma, (4) glioblastoma multiforme, (5) lung Squamous cell carcinoma and (6) pancreatic ductal adenocarcinoma (PDAC).

suppressMessages(library(plotly))
##### Generate PCA plots for distant tumour types
ref <- "mix"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
##### Generate PCA 2-D plot (colour datasets)
#p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Dataset, text=paste(target$Dataset, rownames(pca.df), sep=": "), colors = datasets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
#  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

#p2

##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Gastrointestinal (core)

The expression profile of investigated sample (in red) was projected against core gastrointestinal tumours, including (1) esophageal carcinoma, (2) stomach adenocarcinoma, (3) colon adenocarcinoma and (4) rectum adenocarcinoma, as defined in the TCGA PanCan paper, as well as (5) pancreatic ductal adenocarcinoma (PDAC).

suppressMessages(library(plotly))
##### Generate PCA plots for core gastrointestinal tumours
ref <- "core_gastrointestinal"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Gastrointestinal (developmental)

The expression profile of investigated sample (in red) was projected against developmental gastrointestinal tumours, including (1) liver hepatocellular carcinoma, (2) cholangiocarcinoma and (3) pancreatic ductal adenocarcinoma (PDAC), as defined in the TCGA PanCan paper.

suppressMessages(library(plotly))
##### Generate PCA plots for developmental gastrointestinal tumours
ref <- "developmental_gastrointestinal"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Molecular classification

The projection of investigated sample in the context of TCGA PAAD samples classified based mRNA subtypes reported by Bailey et al. (2016), Moffitt et al. (2015) and Collisson et al. (2011). The molecular classification aids patient assignment into less heterogeneous and more appropriate group regarding the metastatic risk and the therapeutic response, with the consequences of better predicting evolution and better orienting the treatment. A recent report by Birnbaum DJ1 et al. (2018) reviews the association between PDAC molecular subtypes and drugs sensitivity.

Development notes

  • Use only set of genes associated with individual molecular classification

Bailey

suppressMessages(library(plotly))
##### Generate PCA plots for TCGA PAAD samples stratified according to Bailey classification
ref <- "bailey"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))


##### Generate PCA 3-D plot  (colour targets)
p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter3d', mode = "markers", marker = list(size=8, opacity = 0.7, symbol="circle"), width = 800, height = 800) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), zaxis = list(title = paste( "PC3", " (",importance_pca["PC3"],")",sep="")) , margin = list(l=50, r=50, b=50, t=10, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
p2
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)
#htmlwidgets::saveWidget(as_widget(p2), paste0(normalizePath(params$report_dir), "/", "mix_PCA_3d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Collisson

suppressMessages(library(plotly))
##### Generate PCA plots for TCGA PAAD samples stratified according to Collisson classification
ref <- "collisson"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

##### Generate PCA 3-D plot  (colour targets)
p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter3d', mode = "markers", marker = list(size=8, opacity = 0.7, symbol="circle"), width = 800, height = 800) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), zaxis = list(title = paste( "PC3", " (",importance_pca["PC3"],")",sep="")) , margin = list(l=50, r=50, b=50, t=10, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
p2
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)
#htmlwidgets::saveWidget(as_widget(p2), paste0(normalizePath(params$report_dir), "/", "mix_PCA_3d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Moffitt

suppressMessages(library(plotly))
##### Generate PCA plots for TCGA PAAD samples stratified according to Moffitt classification
ref <- "moffitt"

target <- ref_datasets.list[[ref]][[2]]
pca.df <- ref_datasets.list[[ref]][[4]]$pca.df
importance_pca <- ref_datasets.list[[ref]][[4]]$importance_pca
targets.colour <- ref_datasets.list[[ref]][[4]]$targets
datasets.colour <- ref_datasets.list[[ref]][[4]]$datasets

##### Generate PCA 2-D plot (colour targets)
p1 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter', mode = "markers", marker = list(size=10, opacity = 0.7, symbol="circle"), width = 800, height = 600) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), margin = list(l=50, r=50, b=50, t=20, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

##### Generate PCA 3-D plot  (colour targets)
p2 <- plot_ly(pca.df, x = ~PC1, y = ~PC2, z = ~PC3, color = ~Target, text=paste(target$Target, rownames(pca.df), sep=": "), colors = targets.colour[[1]], type='scatter3d', mode = "markers", marker = list(size=8, opacity = 0.7, symbol="circle"), width = 800, height = 800) %>%
  layout(title = "", xaxis = list(title = paste( "PC1", " (",importance_pca["PC1"],")",sep="")), yaxis = list(title = paste( "PC2", " (",importance_pca["PC2"],")",sep="")), zaxis = list(title = paste( "PC3", " (",importance_pca["PC3"],")",sep="")) , margin = list(l=50, r=50, b=50, t=10, pad=4), autosize = FALSE, showlegend = TRUE, legend = list(orientation = "v", y = 0.9))

p1
p2
##### Save PCA plots as html
#htmlwidgets::saveWidget(as_widget(p1), paste0(normalizePath(params$report_dir), "/mix_PCA_2d_targets.html"), selfcontained = TRUE)
#htmlwidgets::saveWidget(as_widget(p2), paste0(normalizePath(params$report_dir), "/", "mix_PCA_3d_targets.html"), selfcontained = TRUE)

##### Detach plotly package. Otherwise it clashes with other graphics devices
detach("package:plotly", unload=FALSE)

Addendum

devtools::session_info()
## Session info -------------------------------------------------------------
##  setting  value                       
##  version  R version 3.5.0 (2018-04-23)
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_AU.UTF-8                 
##  tz       Australia/Melbourne         
##  date     2018-08-22
## Packages -----------------------------------------------------------------
##  package        * version date       source        
##  assertthat       0.2.0   2017-04-11 CRAN (R 3.5.0)
##  backports        1.1.2   2017-12-13 CRAN (R 3.5.0)
##  base           * 3.5.0   2018-04-24 local         
##  bindr            0.1.1   2018-03-13 CRAN (R 3.5.0)
##  bindrcpp       * 0.2.2   2018-03-29 CRAN (R 3.5.0)
##  colorspace       1.3-2   2016-12-14 CRAN (R 3.5.0)
##  compiler         3.5.0   2018-04-24 local         
##  crayon           1.3.4   2017-09-16 CRAN (R 3.5.0)
##  crosstalk        1.0.0   2016-12-21 CRAN (R 3.5.0)
##  data.table       1.11.4  2018-05-27 CRAN (R 3.5.0)
##  datasets       * 3.5.0   2018-04-24 local         
##  devtools         1.13.6  2018-06-27 CRAN (R 3.5.0)
##  digest           0.6.15  2018-01-28 CRAN (R 3.5.0)
##  dplyr            0.7.6   2018-06-29 CRAN (R 3.5.1)
##  edgeR          * 3.22.3  2018-06-21 Bioconductor  
##  evaluate         0.11    2018-07-17 CRAN (R 3.5.0)
##  getopt           1.20.2  2018-02-16 CRAN (R 3.5.0)
##  ggplot2        * 3.0.0   2018-07-03 CRAN (R 3.5.0)
##  glue             1.3.0   2018-07-17 CRAN (R 3.5.0)
##  graphics       * 3.5.0   2018-04-24 local         
##  grDevices      * 3.5.0   2018-04-24 local         
##  grid             3.5.0   2018-04-24 local         
##  gtable           0.2.0   2016-02-26 CRAN (R 3.5.0)
##  htmltools        0.3.6   2017-04-28 CRAN (R 3.5.0)
##  htmlwidgets      1.2     2018-04-19 CRAN (R 3.5.0)
##  httpuv           1.4.5   2018-07-19 CRAN (R 3.5.0)
##  httr             1.3.1   2017-08-20 CRAN (R 3.5.0)
##  jsonlite         1.5     2017-06-01 CRAN (R 3.5.0)
##  knitr            1.20    2018-02-20 CRAN (R 3.5.0)
##  later            0.7.3   2018-06-08 CRAN (R 3.5.0)
##  lattice          0.20-35 2017-03-25 CRAN (R 3.5.0)
##  lazyeval         0.2.1   2017-10-29 CRAN (R 3.5.0)
##  limma          * 3.36.2  2018-06-21 Bioconductor  
##  locfit           1.5-9.1 2013-04-20 CRAN (R 3.5.0)
##  magrittr         1.5     2014-11-22 CRAN (R 3.5.0)
##  memoise          1.1.0   2017-04-21 CRAN (R 3.5.0)
##  methods        * 3.5.0   2018-04-24 local         
##  mime             0.5     2016-07-07 CRAN (R 3.5.0)
##  munsell          0.5.0   2018-06-12 CRAN (R 3.5.0)
##  optparse       * 1.6.0   2018-06-17 CRAN (R 3.5.0)
##  pander           0.6.2   2018-07-08 CRAN (R 3.5.0)
##  pillar           1.3.0   2018-07-14 CRAN (R 3.5.0)
##  pkgconfig        2.0.2   2018-08-16 CRAN (R 3.5.0)
##  plotly           4.8.0   2018-07-20 CRAN (R 3.5.0)
##  plyr             1.8.4   2016-06-08 CRAN (R 3.5.0)
##  preprocessCore * 1.42.0  2018-05-01 Bioconductor  
##  promises         1.0.1   2018-04-13 CRAN (R 3.5.0)
##  purrr            0.2.5   2018-05-29 CRAN (R 3.5.0)
##  R6               2.2.2   2017-06-17 CRAN (R 3.5.0)
##  rapportools    * 1.0     2014-01-07 CRAN (R 3.5.0)
##  Rcpp             0.12.18 2018-07-23 CRAN (R 3.5.0)
##  reshape        * 0.8.7   2017-08-06 CRAN (R 3.5.0)
##  rlang            0.2.2   2018-08-16 CRAN (R 3.5.0)
##  rmarkdown        1.10    2018-06-11 CRAN (R 3.5.0)
##  rprojroot        1.3-2   2018-01-03 CRAN (R 3.5.0)
##  scales           1.0.0   2018-08-09 CRAN (R 3.5.0)
##  shiny            1.1.0   2018-05-17 CRAN (R 3.5.0)
##  stats          * 3.5.0   2018-04-24 local         
##  stringi          1.2.4   2018-07-20 CRAN (R 3.5.0)
##  stringr          1.3.1   2018-05-10 CRAN (R 3.5.0)
##  tibble           1.4.2   2018-01-22 CRAN (R 3.5.0)
##  tidyr            0.8.1   2018-05-18 CRAN (R 3.5.0)
##  tidyselect       0.2.4   2018-02-26 CRAN (R 3.5.0)
##  tools            3.5.0   2018-04-24 local         
##  utils          * 3.5.0   2018-04-24 local         
##  viridisLite      0.3.0   2018-02-01 CRAN (R 3.5.0)
##  withr            2.1.2   2018-03-15 CRAN (R 3.5.0)
##  xtable           1.8-2   2016-02-05 CRAN (R 3.5.0)
##  yaml             2.2.0   2018-07-25 CRAN (R 3.5.0)